install.packages("fs",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/_d/hvsdqqnd3jddpd6y2p2zdpt40000gn/T//RtmpzcCip1/downloaded_packages
install.packages("fs",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/_d/hvsdqqnd3jddpd6y2p2zdpt40000gn/T//RtmpzcCip1/downloaded_packages
library("ggridges")
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
library(scales)
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
bnb <- dsbox::edibnb
bnb <- bnb |> group_by(neighbourhood) |> summarise(median(review_scores_rating, na.rm=TRUE))bnb <- dsbox::edibnb
bnb <- bnb |> mutate(neighbourhood=fct_reorder(neighbourhood, review_scores_rating, .fun='median'))Warning: There was 1 warning in `mutate()`.
ℹ In argument: `neighbourhood = fct_reorder(neighbourhood,
review_scores_rating, .fun = "median")`.
Caused by warning:
! `fct_reorder()` removing 2177 missing values.
ℹ Use `.na_rm = TRUE` to silence this message.
ℹ Use `.na_rm = FALSE` to preserve NAs.
ggplot(bnb, aes(x=review_scores_rating, y=neighbourhood)) +
geom_density_ridges() +
labs(
title="Average AirBnB Ratings across Neighbourhoods",
y="Neighbourhood",
x="Review Rating"
)Picking joint bandwidth of 1.21
Warning: Removed 2177 rows containing non-finite outside the scale range
(`stat_density_ridges()`).
Interpretation:
Interesting to see that most values are near 100.
There are also blips just around the 80 marks, and this is consistent across every neighbourhood.
I can’t tell from the graph itself, but it could be that number of observations are so small as the review rating decreases that it shows a continuing line. The other possibility is that those values are all nonexistent, but I do not think that would be an accurate representation on the graph. “Zoomed out” at this scale, it is hard to decipher the meaning.
library(fs)
# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")Rows: 2394 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Company, Total, Dems...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")Rows: 2394 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Company, Total, Dems...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pac <- clean_names(pac)
pac <- pac |>
extract(
year,
into = "year",
regex = "-([0-9]{4})"
) |> mutate(year=as.integer(year))
pac <- pac |> extract(dems, into="dems", regex="([0-9]+)") |> mutate(dems=as.double(dems))
pac <- pac |> extract(repubs, into="repubs", regex="([0-9]+)") |> mutate(repubs=as.double(repubs))
pac <- pac |> extract(country_of_origin_parent_company, c("Country", "Parent Company"), "([\\w ]+)/([\\w\\D ]+)") pac2 <- pac |> pivot_longer(cols=c("dems", "repubs"), names_to="party", values_to="amount")
pac2['party'] = case_when(pac2['party']=='dems' ~ 'Democrat', pac2['party']=='repubs' ~ 'Republican',
.default='Not Listed')uk <- pac2 |>
filter(Country == "UK") |>
group_by(year, party) |>
summarise(total=sum(amount))`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
ggplot(uk, aes(x=year, y=total, color=party)) +
geom_line(linewidth=1) +
# make sacle 1M, 2M, 3M
scale_y_continuous(labels = label_currency(scale_cut=cut_short_scale())) +
theme(
panel.background = element_rect(fill="white"),
panel.grid.major.x = element_line(color="lightgrey"),
panel.grid.major.y = element_line(color="lightgrey"),
panel.grid.minor.x = element_line(color="lightgrey"),
panel.grid.minor.y = element_line(color="lightgrey"),
legend.position=c(.9, .15),
legend.background = element_blank(),
plot.margin = margin(1, 1, 5, 1)
) +
scale_color_manual(labels = c("Democrat", "Republican"), values = c("blue", "red")) +
labs(
color = "Party", x="", y="", title="Contributions to US political parties from UK-connected PACs"
) +
# allow for text outside of plotting area
coord_cartesian(clip="off") +
annotate("text", label="Source: OpenSecrets.org", x=2020.7, y=1e6, vjust=7, size=3) +
annotate("text", label="Total Amount", x=2000, y=1e6, vjust=-6, size=4, hjust=.3, angle=90) +
annotate("text", label="Year", x=2000, y=1e6, vjust=5, size=4, hjust=1)Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
france <- pac2 |>
filter(Country == "France") |>
group_by(year, party) |>
summarise(total=sum(amount))`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
ggplot(france, aes(x=year, y=total, color=party)) +
geom_line(linewidth=1) +
# make sacle 1M, 2M, 3M
scale_y_continuous(labels = label_currency(scale_cut=cut_short_scale())) +
theme(
panel.background = element_rect(fill="white"),
panel.grid.major.x = element_line(color="lightgrey"),
panel.grid.major.y = element_line(color="lightgrey"),
panel.grid.minor.x = element_line(color="lightgrey"),
panel.grid.minor.y = element_line(color="lightgrey"),
legend.position=c(.9, .15),
legend.background = element_blank(),
plot.margin = margin(1, 1, 5, 1)
) +
scale_color_manual(labels = c("Democrat", "Republican"), values = c("blue", "red")) +
labs(
color = "Party", x="", y="", title="Contributions to US political parties from French-connected PACs"
) +
# allow for text outside of plotting area
coord_cartesian(clip="off") +
annotate("text", label="Source: OpenSecrets.org", x=2020.3, y=200000, vjust=6.5, size=3) +
annotate("text", label="Total Amount", x=2000, y=200000, vjust=-7, size=4, hjust=.4, angle=90) +
annotate("text", label="Year", x=2000, y=200000, vjust=5, size=4, hjust=1)It’s interesting to find that there is a similar trend between the UK and French political contributions. We see an increase around 2008 for democrats, and a decrease for republicans around 2018.
medhousing <- read_csv("data/median-housing.csv")Rows: 234 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (1): MSPUS
date (1): DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
recessions <- read_csv("data/recessions.csv")Rows: 34 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
date (2): Peak, Trough
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
medhousing <- rename(medhousing, all_of(c(date="DATE", price="MSPUS")))
library(scales)ggplot(medhousing, aes(x=date, y=price)) +
geom_line(color="#5571DC", linewidth=1) +
scale_y_continuous(limits=c(0,400000), labels=label_comma(), n.breaks=13) +
labs(
x="", y="Price", title="Median sales price of houses sold in the United States",
subtitle="Not seasonally adjusted"
) +
scale_x_date(date_breaks="5 years", date_labels="%Y") +
# annotation_custom(textGrob("Sources: Census; HUD"))
annotate("text", label="Sources: Census; HUD", x=ymd("2015-12-01"), y=0, vjust=5) +
coord_cartesian(clip = "off") +
theme(
panel.grid.minor.x=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid.major.y=element_line(color="lightgrey"),
axis.ticks=element_blank(),
plot.margin = unit(c(1,1,2,1), "lines")
) library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
# get min and max housing date
min_housing_date = min(medhousing$date)
max_housing_date = max(medhousing$date)
# i realized that the next 5 lines aren't needed.
recessions['occurredDuringHousing'] = ifelse(recessions$Peak >= min_housing_date & recessions$Trough <= max_housing_date, TRUE, FALSE)
medhousing['prev_year_price'] = shift(medhousing$price, 1, type='lag')
medhousing['decline'] = ifelse(medhousing$price < medhousing$prev_year_price, TRUE, FALSE)
medhousing['nextdate'] = shift(medhousing$date, -1)
# create base plot
baseplot <- ggplot(medhousing, aes(x=date, y=price)) +
scale_y_continuous(limits=c(0,400000), labels=label_comma(), n.breaks=13) +
labs(
x="", y="Price", title="Median sales price of houses sold in the United States",
subtitle="Not seasonally adjusted"
) +
scale_x_date(date_breaks="5 years", date_labels="%Y") +
annotate("text", label="Shaded areas indicate U.S. recessions", x=ymd("2015-12-01"), y=0, vjust=5, hjust=.7) +
annotate("text", label="Sources: Census; HUD", x=ymd("2015-12-01"), y=0, vjust=7) +
coord_cartesian(clip = "off") +
theme(
panel.grid.minor.x=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid.major.y=element_line(color="lightgrey"),
axis.ticks=element_blank(),
plot.margin = unit(c(1,1,2,1), "lines")
)
# identify recessions
identifiedRecessions <- recessions |> filter(recessions$occurredDuringHousing == TRUE)
for (i in 1:nrow(identifiedRecessions)) {
row <- identifiedRecessions[i,]
baseplot <- baseplot + annotate("rect", xmin=row$Peak, xmax=row$Trough, ymin=0, ymax=40e4, fill="#E6E6E6", alpha=1)
}
baseplot + geom_line(color="#5571DC",linewidth=1)install.packages("zoo",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/_d/hvsdqqnd3jddpd6y2p2zdpt40000gn/T//RtmpzcCip1/downloaded_packages
library(zoo)
Attaching package: 'zoo'
The following objects are masked from 'package:data.table':
yearmon, yearqtr
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
medhousingSubset <- medhousing |> filter(date >= ymd("2019-01-01") & date <= ymd("2020-12-31"))
medhousingSubset <- medhousingSubset |> mutate(year = year(date)) |> mutate(quarter = case_when(
month(date) %in% c(1, 2, 3) ~ "Q1",
month(date) %in% c(4, 5, 6) ~ "Q2",
month(date) %in% c(7, 8, 9) ~ "Q3",
month(date) %in% c(10, 11, 12) ~ "Q4",
.default = ""
)) |> mutate(posixdate = as.POSIXct(date))
ggplot(medhousingSubset, aes(y=price, x=date)) +
geom_line(color="#5571DC", linewidth=1) + geom_point(color='#5571DC', fill='white', shape=21) +
theme(
panel.background = element_rect(fill="white"),
panel.grid.major = element_line(color="#EBEBEB"),
panel.grid.minor.y=element_line(color="#EBEBEB"),
strip.clip="on",
axis.ticks = element_blank(),
plot.margin = unit(c(1, 1, 2, 1), "cm")
) + xlim(ymd("2019-01-01", ymd("2020-12-31"))) +
scale_y_continuous(limits=c(300000, 360000),labels=label_comma(), breaks=seq(30e4,36e4, by = 2e4), expand=c(0,0)) +
labs(
x="", y="Dollars", title="Median sales price of houses sold in the United States",
subtitle="Not seasonally adjusted"
) +
coord_cartesian(clip = "off") +
annotate("text", label="2020", x=ymd("2020-10-01"), y=30e4, vjust=5, hjust=4.) +
annotate("text", label="2019", x=ymd("2020-10-01"), y=30e4, vjust=5, hjust=13.5) +
scale_x_date(date_breaks = "3 months", labels=quarter, breaks=c(1, 4, 7, 12), expand=c(0,0)) Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
install.packages("ggforce",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/_d/hvsdqqnd3jddpd6y2p2zdpt40000gn/T//RtmpzcCip1/downloaded_packages
library("ggforce")For sourcing the colors, I did not look this up online. I instead took a screenshot of your photo and did a color picker for the hexadecimal color. You can do so built in from Mac. I verified it from this: https://www.brandcolorcode.com/target
For this process, I made three circles filled in alternating with red, white, red, and layered them with smaller ones in the front.
library(knitr)
library("ggplot2")
library("ggforce")
knitr::opts_chunk$set(
fig.width = 7, # 7" width
fig.asp = .4, # the golden ratio
fig.retina = 3, # dpi multiplier for displaying HTML output on retina
fig.align = "center", # center align figures
dpi = 300 # higher dpi, sharper image
)
# creating a data frame
df <- data.frame(col1=sample(rep(c(1, 20, 40), each=26)),
col2=sample(rep(c(1: 6), each=13))
)
df <- data.frame(col1=c(0, 100), col2=c(0, 100))
# plotting the data
ggplot(df, aes(x=col1, y=col2)) +
geom_circle(aes(x0=50, y0=50, r=50), fill="#CC0000", color="#CC0000", inherit.aes=FALSE)+
geom_circle(aes(x0=50, y0=50, r=35), fill="white", color="white", inherit.aes=FALSE)+
geom_circle(aes(x0=50, y0=50, r=15), fill="#CC0000", color="#CC0000", inherit.aes=FALSE)+
coord_fixed(clip="off") +
theme (
panel.background = element_blank(),
axis.title = element_blank(),
legend.position="none",
axis.ticks.x = element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
# plot.margin=margin(10, 10, 10, 10)
) +
annotate("text", label="TARGET", x=50, y=0, vjust=2, color="#CC0000", size=12, fontface="bold") +
geom_circle(aes(x0=88, y0=-23, r=4.5), fill="white", color="#CC0000", inherit.aes=FALSE, linewidth=1) +
annotate("text", label="R", x=88, y=-23, color="#CC0000", size=4, fontface="bold")Warning in geom_circle(aes(x0 = 50, y0 = 50, r = 50), fill = "#CC0000", : All aesthetics have length 1, but the data has 2 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_circle(aes(x0 = 50, y0 = 50, r = 35), fill = "white", color = "white", : All aesthetics have length 1, but the data has 2 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_circle(aes(x0 = 50, y0 = 50, r = 15), fill = "#CC0000", : All aesthetics have length 1, but the data has 2 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_circle(aes(x0 = 88, y0 = -23, r = 4.5), fill = "white", : All aesthetics have length 1, but the data has 2 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
install.packages("palmerpenguins",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/_d/hvsdqqnd3jddpd6y2p2zdpt40000gn/T//RtmpzcCip1/downloaded_packages
library(palmerpenguins)
Attaching package: 'palmerpenguins'
The following objects are masked from 'package:datasets':
penguins, penguins_raw
penguins# A tibble: 344 × 8
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
<fct> <fct> <dbl> <dbl> <int> <int>
1 Adelie Torgersen 39.1 18.7 181 3750
2 Adelie Torgersen 39.5 17.4 186 3800
3 Adelie Torgersen 40.3 18 195 3250
4 Adelie Torgersen NA NA NA NA
5 Adelie Torgersen 36.7 19.3 193 3450
6 Adelie Torgersen 39.3 20.6 190 3650
7 Adelie Torgersen 38.9 17.8 181 3625
8 Adelie Torgersen 39.2 19.6 195 4675
9 Adelie Torgersen 34.1 18.1 193 3475
10 Adelie Torgersen 42 20.2 190 4250
# ℹ 334 more rows
# ℹ 2 more variables: sex <fct>, year <int>
ggplot(penguins, aes(x=bill_depth_mm, y=bill_length_mm, color=flipper_length_mm)) +
geom_point()Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(penguins, aes(x=bill_depth_mm, y=bill_length_mm, color=flipper_length_mm)) +
geom_point(shape="\u003C", size=4) +
scale_color_gradient(low="pink", high="orange") +
scale_x_continuous(limits=c(0, 40)) +
theme(
panel.background = element_rect(fill="red"),
panel.grid.major.x = element_line(color="green"),
panel.grid.major.y = element_line(color="blue", linewidth = 2),
legend.background = element_rect()
)Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).